import scrapy
from cmjPro.items import CmjproItem
class CmjSpider(scrapy.Spider):
    name = 'cmj'
    #allowed_domains = ['www.xxx.com']
    start_urls = ['https://www.qiushibaike.com/text/']

    def parse(self, response):
        #解析：作者的名称+段子内容
        div_list = response.xpath('//div[@class="col1 old-style-col1"]/div')
        all_data=[]
        for div in div_list:
            #xpath返回的是列表，但是列表元素一定是Selector类型的对象
            #extract可以将Selector对象中data参数存储的字符串提取出来
            #author = div.xpath('./div[1]/a[2]/h2/text()')[0].extract()
            author = div.xpath('./div[1]/a[2]/h2/text()').extract_first()
            content = div.xpath('./a/div/span[1]//text()').extract()
            #列表调用了extract之后，则表示将列表中每一个Selector对象中data对应的字符串提取出来
            content = ''.join(content)
            dic={
                'author':author,
                'content':content
            }
            all_data.append(dic)
            #print(author,content)
        return all_data





            